Automate Etsy Data Mining with Bright Data Scrape & Google Gemini
工作流概述
这是一个包含19个节点的复杂工作流,主要用于自动化处理各种任务。
工作流源代码
{
"id": "UuuCIDvTNnloIlvq",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
"templateCredsSetupCompleted": true
},
"name": "Automate Etsy Data Mining with Bright Data Scrape & Google Gemini",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ddPkw7Hg5dZhQu2w",
"name": "AI",
"createdAt": "2025-04-13T05:38:08.053Z",
"updatedAt": "2025-04-13T05:38:08.053Z"
}
],
"nodes": [
{
"id": "f369feaf-4782-4411-9d08-fe91b9ffd97e",
"name": "When clicking ‘Test workflow’",
"type": "n8n-nodes-base.manualTrigger",
"position": [
200,
-555
],
"parameters": {},
"typeVersion": 1
},
{
"id": "231bae3c-c27e-49fc-b878-2d5cc1e14c5a",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
200,
-1020
],
"parameters": {
"width": 400,
"height": 300,
"content": "## Note
Deals with the Esty web scraping by utilizing the Bright Data Web Unlocker Product.
The Information Extraction node being used to demonstrate the usage of the N8N AI capabilities.
**Please make sure to set the Indeed search query and update the Webhook Notification URL**"
},
"typeVersion": 1
},
{
"id": "f568de40-b389-41f9-afe9-5e09a291c367",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
640,
-1020
],
"parameters": {
"width": 480,
"height": 300,
"content": "## LLM Usages
Google Gemini Flash Exp model is being used.
Basic LLM Chain Data Extractor."
},
"typeVersion": 1
},
{
"id": "4f1db865-a0cb-4978-9c7d-fde448bd978a",
"name": "Set Esty Search Query",
"type": "n8n-nodes-base.set",
"position": [
420,
-555
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "3aedba66-f447-4d7a-93c0-8158c5e795f9",
"name": "url",
"type": "string",
"value": "https://www.etsy.com/search?q=wall+art+for+mum&order=date_desc&page=1&ref=pagination"
},
{
"id": "4e7ee31d-da89-422f-8079-2ff2d357a0ba",
"name": "zone",
"type": "string",
"value": "web_unlocker1"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "4cb51368-bb69-4d99-a0b6-e8e8013f1dfd",
"name": "Perform Esty Web Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-680
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "={{ $json.zone }}"
},
{
"name": "url",
"value": "={{ $json.url }}?product=unlocker&method=api"
},
{
"name": "format",
"value": "raw"
},
{
"name": "data_format",
"value": "markdown"
}
]
},
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "9fb7bdc5-ba64-4df4-89b4-a3207e7f6d0e",
"name": "Google Gemini Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
-460
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "1f95576d-e243-481d-9d5f-308764d8ea4b",
"name": "Loop Over Items",
"type": "n8n-nodes-base.splitInBatches",
"position": [
1460,
-680
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "47f23aa1-63ee-49e3-a465-283c7ab71b76",
"name": "Perform Esty web request over the loop",
"type": "n8n-nodes-base.httpRequest",
"position": [
1680,
-560
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "=web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}&product=unlocker"
},
{
"name": "format",
"value": "raw"
},
{
"name": "data_format",
"value": "markdown"
}
]
},
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "0b5ea206-a5a0-49b5-8f53-10b4dec5806c",
"name": "Initiate a Webhook Notification for the extracted data",
"type": "n8n-nodes-base.httpRequest",
"position": [
2320,
-560
],
"parameters": {
"url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "summary",
"value": "={{ $json.output }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "a164b90b-f44c-4862-b010-d515926774c7",
"name": "Extract Item List with the Product Info",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
1920,
-560
],
"parameters": {
"text": "=Extract the product info in JSON
{{ $json.data }}",
"options": {},
"schemaType": "fromJson",
"jsonSchemaExample": "[{
\"image\": \"https://i.etsystatic.com/34923795/r/il/8f3bba/5855230678/il_fullxfull.5855230678_n9el.jpg\",
\"name\": \"Custom Coffee Mug with Photo\",
\"url\": \"https://www.etsy.com/listing/1193808036/custom-coffee-mug-with-photo\",
\"brand\": {
\"@type\": \"Brand\",
\"name\": \"TheGiftBucks\"
},
\"offers\": {
\"@type\": \"Offer\",
\"price\": \"14.99\",
\"priceCurrency\": \"USD\"
}
}]"
},
"typeVersion": 1
},
{
"id": "c3798c64-ac53-44c8-ba91-8fe33377113d",
"name": "Google Gemini Chat Model for product info",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
2000,
-300
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "11e4ae42-d2e1-4a4b-adcf-382f9e494431",
"name": "Extract Paginated Resultset",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
860,
-680
],
"parameters": {
"text": "=Analyze and Extract the below content. Make sure to produce a unique resultset. Exclude page_numbers which are not numbers.
{{ $json.data }}",
"options": {},
"schemaType": "manual",
"inputSchema": "{
\"$schema\": \"http://json-schema.org/schema#\",
\"title\": \"PagedResultSetSchema\",
\"type\": \"array\",
\"items\": {
\"type\": \"object\",
\"properties\": {
\"page_number\": {
\"type\": \"string\",
\"description\": \"Page number, typically a string (e.g., '1', '2', 'next').\"
},
\"url\": {
\"type\": \"string\",
\"format\": \"uri\",
\"description\": \"URL pointing to the page.\"
}
},
\"required\": [\"page_number\", \"url\"],
\"additionalProperties\": false
}
}
"
},
"typeVersion": 1
},
{
"id": "28c1822b-d51c-4f8e-b98e-2e12324397be",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1400,
-780
],
"parameters": {
"color": 5,
"width": 1340,
"height": 620,
"content": "## Loop and Perform Paginated Esty Data Extraction
"
},
"typeVersion": 1
},
{
"id": "d4f18f2b-9825-4320-addb-c02bfdc4da97",
"name": "Write the scraped content to disk",
"type": "n8n-nodes-base.readWriteFile",
"position": [
2560,
-760
],
"parameters": {
"options": {},
"fileName": "=d:\Esty-Scraped-Content-{{ $('Loop Over Items').item.json.page_number }}.json",
"operation": "write"
},
"typeVersion": 1
},
{
"id": "5555407d-c7dd-4e5c-83ab-ef6ba9c46da3",
"name": "Create a binary data",
"type": "n8n-nodes-base.function",
"position": [
2360,
-760
],
"parameters": {
"functionCode": "items[0].binary = {
data: {
data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')
}
};
return items;"
},
"typeVersion": 1
},
{
"id": "2f7a5fab-a2f4-422e-8f83-ce50fbe2a738",
"name": "Split Out",
"type": "n8n-nodes-base.splitOut",
"position": [
1240,
-680
],
"parameters": {
"options": {},
"fieldToSplitOut": "output"
},
"typeVersion": 1
},
{
"id": "3d7a8992-b8d4-4a86-b60b-a92a7d63e31b",
"name": "Extract Paginated Resultset With OpenAI",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
880,
-120
],
"parameters": {
"text": "=Analyze and Extract the below content. Make sure to produce a unique resultset. Exclude page_numbers which are not numbers.
{{ $json.data }}",
"options": {},
"schemaType": "manual",
"inputSchema": "{
\"$schema\": \"http://json-schema.org/schema#\",
\"title\": \"PagedResultSetSchema\",
\"type\": \"array\",
\"items\": {
\"type\": \"object\",
\"properties\": {
\"page_number\": {
\"type\": \"string\",
\"description\": \"Page number, typically a string (e.g., '1', '2', 'next').\"
},
\"url\": {
\"type\": \"string\",
\"format\": \"uri\",
\"description\": \"URL pointing to the page.\"
}
},
\"required\": [\"page_number\", \"url\"],
\"additionalProperties\": false
}
}
"
},
"typeVersion": 1
},
{
"id": "aa42d335-67bc-4dc5-a68a-4ce93e05464a",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
880,
80
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "vPKynKbDzJ5ZU4cU",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "82df0ccc-3065-4bb5-a48e-90e4dbf2162f",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
640,
-260
],
"parameters": {
"color": 6,
"width": 660,
"height": 460,
"content": "## Open AI Extraction (Optional)
Note - Replace the above workflow with the Open AI Chat Model if needed
Please make sure to set the OpenAI Chat Model -> Credential to connect with **OpenAi Account**"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "40a1bbd5-05b2-41c2-8b3c-72e3f16fd13a",
"connections": {
"Split Out": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[],
[
{
"node": "Perform Esty web request over the loop",
"type": "main",
"index": 0
}
]
]
},
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "Extract Paginated Resultset With OpenAI",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Create a binary data": {
"main": [
[
{
"node": "Write the scraped content to disk",
"type": "main",
"index": 0
}
]
]
},
"Set Esty Search Query": {
"main": [
[
{
"node": "Perform Esty Web Request",
"type": "main",
"index": 0
}
]
]
},
"Google Gemini Chat Model": {
"ai_languageModel": [
[
{
"node": "Extract Paginated Resultset",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Perform Esty Web Request": {
"main": [
[
{
"node": "Extract Paginated Resultset",
"type": "main",
"index": 0
}
]
]
},
"Extract Paginated Resultset": {
"main": [
[
{
"node": "Split Out",
"type": "main",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "Set Esty Search Query",
"type": "main",
"index": 0
}
]
]
},
"Perform Esty web request over the loop": {
"main": [
[
{
"node": "Extract Item List with the Product Info",
"type": "main",
"index": 0
}
]
]
},
"Extract Item List with the Product Info": {
"main": [
[
{
"node": "Initiate a Webhook Notification for the extracted data",
"type": "main",
"index": 0
},
{
"node": "Create a binary data",
"type": "main",
"index": 0
}
]
]
},
"Google Gemini Chat Model for product info": {
"ai_languageModel": [
[
{
"node": "Extract Item List with the Product Info",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Initiate a Webhook Notification for the extracted data": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
}
}
}
功能特点
- 自动检测新邮件
- AI智能内容分析
- 自定义分类规则
- 批量处理能力
- 详细的处理日志
技术分析
节点类型及作用
- Manualtrigger
- Stickynote
- Set
- Httprequest
- @N8N/N8N Nodes Langchain.Lmchatgooglegemini
复杂度评估
配置难度:
维护难度:
扩展性:
实施指南
前置条件
- 有效的Gmail账户
- n8n平台访问权限
- Google API凭证
- AI分类服务订阅
配置步骤
- 在n8n中导入工作流JSON文件
- 配置Gmail节点的认证信息
- 设置AI分类器的API密钥
- 自定义分类规则和标签映射
- 测试工作流执行
- 配置定时触发器(可选)
关键参数
| 参数名称 | 默认值 | 说明 |
|---|---|---|
| maxEmails | 50 | 单次处理的最大邮件数量 |
| confidenceThreshold | 0.8 | 分类置信度阈值 |
| autoLabel | true | 是否自动添加标签 |
最佳实践
优化建议
- 定期更新AI分类模型以提高准确性
- 根据邮件量调整处理批次大小
- 设置合理的分类置信度阈值
- 定期清理过期的分类规则
安全注意事项
- 妥善保管API密钥和认证信息
- 限制工作流的访问权限
- 定期审查处理日志
- 启用双因素认证保护Gmail账户
性能优化
- 使用增量处理减少重复工作
- 缓存频繁访问的数据
- 并行处理多个邮件分类任务
- 监控系统资源使用情况
故障排除
常见问题
邮件未被正确分类
检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。
Gmail认证失败
确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。
调试技巧
- 启用详细日志记录查看每个步骤的执行情况
- 使用测试邮件验证分类逻辑
- 检查网络连接和API服务状态
- 逐步执行工作流定位问题节点
错误处理
工作流包含以下错误处理机制:
- 网络超时自动重试(最多3次)
- API错误记录和告警
- 处理失败邮件的隔离机制
- 异常情况下的回滚操作